Appendix 2: Landscapes Landuse and forest cover at local and landscape scale

load(here("data","datasets.Rdata")) # datasets
load("models_outputs/models_local_cover.Rdata") # models outputs
load("models_outputs/models_landscape_cover.Rdata") # models outputs
source("scripts/r2_auxiliary_function.R")
# variance inflation factor function
vif.mer <- function (fit) {
    ## adapted from rms::vif
    v <- vcov(fit)
    nam <- names(fixef(fit))
    ## exclude intercepts
    ns <- sum(1 * (nam == "Intercept" | nam == "(Intercept)"))
    if (ns > 0) {
        v <- v[-(1:ns), -(1:ns), drop = FALSE]
        nam <- nam[-(1:ns)]
    }
    d <- diag(v)^0.5
    v <- diag(solve(v/(d %o% d)))
    names(v) <- nam
    v
}

This appendix is a description of the landuse and matrix of the landscapes, togethe with a more specific description of forest cover variables at both local and landscape scales. We present the baseline models for the selection of the best scale for the local forest cover variable for each dataset. For the local scale, we measured the percentage of forest cover within buffers of 400, 600 and 800 m around each sampling site. For the landscape forest cover, we used the 2 km buffer around the landscape centroid.

1. Landuse and matrix description in landscapes

lu <- landuse %>%
  mutate(landscape = fct_reorder(landscape,forest,max)) %>%
  pivot_longer(3:10, names_to = "landuse", values_to = "value") %>%
  mutate(landuse = fct_relevel(landuse, 
                               "forest", "pasture", "coffee", "eucalyptus",
                               "sugarcane", "urban", "water","other")) 

Percentage of each landuse type per landscape

# label colors for each landuse
cores <- c("#549E3E", "#FDF5B5", "#D97853", # for pas coffee
           "#BCDE93", "#FFBBFF", "#BDB39A", # euc sugar water
          "#7AC5CD", "#8B7D6B") # water other
lu %>% mutate(landuse = fct_rev(landuse)) %>%
ggplot(aes(y=landscape, x=value, fill=landuse)) +
  geom_col() +
  facet_grid(matrix~., scales="free_y",space='free_y', switch="y",
             labeller = as_labeller(c("high-quality" = "high-quality landscapes",
                                      "low-quality" = "Low-quality landscapes"))) +
  scale_fill_manual(values=rev(cores)) +
  theme(legend.position = "bottom",
        strip.placement= "outside")+
  xlab("Landuse (%)") + ylab("") + 
  ggtitle("Landuse composition")

Proportions of landuse types among high- and low-quality landscapes

ggplot(lu, aes(x=matrix, y=value,fill=landuse, col=landuse)) +
  facet_wrap(~landuse, ncol=4) +
   geom_violin(fill="white")+
  geom_dotplot(stackdir = "center", dotsize=1.5,
               binaxis = "y") +
  stat_summary(fun=median, size=0.1, fatten=5, col="black",
                         geom="crossbar", width=0.5)+
  scale_fill_manual(values=cores )+
  scale_color_manual(values=cores ) +
  theme(legend.position = "none") +
  xlab("Matrix quality") + ylab("Landuse (%)")

Summary table

lu %>% group_by(landuse, matrix) %>% summarise(min = min(value),
                                               mean=mean(value),
                                               sd = sd(value),                                                                             median=median(value),
                                               max = max(value)) %>% 
  kable(digits = 1)
landuse matrix min mean sd median max
forest high-quality 12.0 30.8 12.4 29.0 55.5
forest low-quality 6.9 25.3 11.9 26.8 46.0
pasture high-quality 20.4 35.5 10.2 34.7 49.5
pasture low-quality 31.7 46.9 11.1 44.8 69.1
coffee high-quality 12.4 21.7 6.8 19.5 32.9
coffee low-quality 0.0 0.0 0.0 0.0 0.0
eucalyptus high-quality 0.0 1.6 3.2 0.4 10.1
eucalyptus low-quality 4.3 18.1 8.7 16.9 37.7
sugarcane high-quality 0.0 4.2 7.9 0.0 20.6
sugarcane low-quality 0.0 0.0 0.0 0.0 0.0
urban high-quality 0.8 2.1 1.7 1.4 5.5
urban low-quality 0.7 7.4 7.5 3.8 26.6
water high-quality 0.0 2.5 4.6 0.3 12.7
water low-quality 0.0 0.9 1.9 0.1 6.2
other high-quality 0.0 1.6 1.1 1.5 3.4
other low-quality 0.0 1.4 2.5 0.0 8.0

Principal Coordinate Analysis

To compare landuse matrices in both high- and low-quality landscapes, we performed a Principal Coordiante analysis with the proportions of each landuse type. - PCoA is more advisable given the nature of the data (sum up 100%, compositional data), we used gower distance tranformation to create a distance matrix among landscapes

cor <- as.numeric(as.factor(landuse$matrix)) # landuse colors

pcoa <- capscale (landuse[,3:10]~1, dist= "gower", scaling = 1)
plot(pcoa, main = 'PCoA (MDS)',  display="si", scaling=2, type="points", 
     ylim=c(-1.1,1),
     col=cor, pch=16)
text (pcoa, display = 'sp', col="blue", scaling=3)
points (pcoa, display = 'si', col=cor, pch=16,scaling=2, cex=1.2)

Landuse heterogeneity - diversity index

Comparing matrix

landuse$simp.div <- diversity(landuse[,3:10], index='simpson')
landuse$shan.div <- diversity(landuse[,3:10], index='shannon')

ggplot(landuse, aes(x=matrix, y=simp.div)) +  geom_boxplot()+
  geom_dotplot(stackdir = "center", dotsize=1,binaxis = "y") +
ggplot(landuse, aes(x=matrix, y=shan.div)) +   geom_boxplot() +
  geom_dotplot(stackdir = "center", dotsize=1, binaxis = "y")

Testing differences in diversity indexes between matrix quality

summary(lm(simp.div ~matrix, data=landuse))
## 
## Call:
## lm(formula = simp.div ~ matrix, data = landuse)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.13997 -0.04294  0.01388  0.04218  0.08816 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        0.69288    0.01761  39.353   <2e-16 ***
## matrixlow-quality -0.05281    0.02342  -2.255   0.0349 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05568 on 21 degrees of freedom
## Multiple R-squared:  0.1949, Adjusted R-squared:  0.1566 
## F-statistic: 5.085 on 1 and 21 DF,  p-value: 0.03494
summary(lm(shan.div ~matrix, data=landuse))
## 
## Call:
## lm(formula = shan.div ~ matrix, data = landuse)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.19609 -0.10582 -0.01792  0.09598  0.34078 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.34359    0.04329  31.036   <2e-16 ***
## matrixlow-quality -0.15295    0.05758  -2.656   0.0148 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1369 on 21 degrees of freedom
## Multiple R-squared:  0.2515, Adjusted R-squared:  0.2158 
## F-statistic: 7.055 on 1 and 21 DF,  p-value: 0.01478

Correlation among proportions of landuse types

ggpairs(landuse, columns = 3:7, ggplot2::aes(colour=matrix),
        lower = list(continuous = "smooth"))

2. Relationships among forest cover variables

We calculated Pearson correlation coefficients for forest cover variables in each matrix quality region (Figure S2.). Also, we ploted the range of local forest cover (400 m) within the landscapes to see how local forest cover varies among landscapes in both regions (Figure S2.).

high.cor <- env[which(env$matrix == "high_quality"), c(10,11,12,9)]
colnames(high.cor) <- c("Local 400m", "Local 600m", 
                      "Local 800m","Landscape 2km")
low.cor <- env[which(env$matrix == "low_quality"), c(10,11,12,9)]
colnames(low.cor) <- c("Local 400m", "Local 600m", 
                      "Local 800m","Landscape 2km")

par(mfrow=c(1,2))
corrplot(cor(high.cor),method="number",type="lower", diag=F, 
         order="original", cl.pos = "n",
         mar = c(0,0,0,0)) 
mtext("High-quality matrix", cex=1.2)
corrplot(cor(low.cor),method="number",type="lower", diag=F, 
         order="original",  cl.pos = "n") 
mtext("Low-quality matrix", cex=1.2)
Correlations among forest cover variables in the high-quality (left) and low-quality matrix (right) landscapes.

Correlations among forest cover variables in the high-quality (left) and low-quality matrix (right) landscapes.

ggplot(env, aes(x=forest_site400, y=forest_land)) +
  facet_grid(~matrix, labeller = as_labeller(c(high_quality = "High-quality matrix", low_quality = "Low-quality matrix"))) +
  geom_point() +
  geom_line(aes(shape=landscape)) +
  xlab("% local forest cover (400 m)") + 
  ylab("% landscape forest cover (2 km)") +
  ylim(5,60)
Range of local forest cover (buffer 400 m) within each landscape (buffer 2 km) for both landscapes with different matrix quality. Each line represent a landscape and the dots area the local forest cover for each sampling site.

Range of local forest cover (buffer 400 m) within each landscape (buffer 2 km) for both landscapes with different matrix quality. Each line represent a landscape and the dots area the local forest cover for each sampling site.

3. Scale of effects for local forest cover

We ran different models with each local forest cover variable and selected the scale of effect using AIC model selection and the R2 of the models. The models follow the specification presented in the paper (Modeling section), except that here we did not include trait variables, i.e. we only modeled the occurrence of species according to forest cover.

We used lme4 package (Bates et al. (2015)) to perform a GLMM with binomial (proportion) distribution. An example of the code for each assemblage is as follows:

model <- glmer(cbind(occor, n.visit-occor) ~  
                local.cover + (local.cover|sp) +   
                (1|landscape:sp) + (1|site:sp) +   
                (1|landscape) + (1|site),          
                family=binomial, data=high.spe)
mhigh.spe4a <- glmer(cbind(occor, n.visit-occor) ~  
                  forest_site400 + 
                   (forest_site400 |sp) + (1|landscape:sp) + (1|site:sp) + 
                   (1|landscape) + (1|site),
                 family=binomial, data=high.spe,
                  nAGQ = 1, control = glmerControl(optimizer = "bobyqa",
                                  optCtrl = list(maxfun = 500000)))
mhigh.spe6a <- glmer(cbind(occor, n.visit-occor) ~  
                  forest_site600 + 
                   (forest_site600 |sp) + (1|landscape:sp) + (1|site:sp) + 
                   (1|landscape) + (1|site),
                 family=binomial, data=high.spe,
                  nAGQ = 1, control = glmerControl(optimizer = "bobyqa",
                                  optCtrl = list(maxfun = 500000)))
mhigh.spe8a <- glmer(cbind(occor, n.visit-occor) ~  
                  forest_site800 + 
                   (forest_site800|sp) + (1|landscape:sp) + (1|site:sp) + 
                   (1|landscape) + (1|site),
                 family=binomial, data=high.spe,
                  nAGQ = 1, control = glmerControl(optimizer = "bobyqa",
                                  optCtrl = list(maxfun = 500000)))

mlow.spe4a <- glmer(cbind(occor, n.visit-occor) ~  
                  forest_site400 + 
                   (forest_site400 |sp) + (1|landscape:sp) + (1|site:sp) + 
                   (1|landscape) + (1|site),
                 family=binomial, data=low.spe,
                  nAGQ = 1, control = glmerControl(optimizer = "bobyqa",
                                  optCtrl = list(maxfun = 500000)))
mlow.spe6a <- glmer(cbind(occor, n.visit-occor) ~  
                  forest_site600 + 
                   (forest_site600 |sp) + (1|landscape:sp) + (1|site:sp) + 
                   (1|landscape) + (1|site),
                 family=binomial, data=low.spe,
                  nAGQ = 1, control = glmerControl(optimizer = "bobyqa",
                                  optCtrl = list(maxfun = 500000)))
mlow.spe8a <- glmer(cbind(occor, n.visit-occor) ~  
                  forest_site800  +
                   (forest_site800|sp) + (1|landscape:sp) + (1|site:sp) + 
                   (1|landscape) + (1|site),
                 family=binomial, data=low.spe,
                  nAGQ = 1, control = glmerControl(optimizer = "bobyqa",
                                  optCtrl = list(maxfun = 500000)))

mhigh.gen4a <- glmer(cbind(occor, n.visit-occor) ~  
                  forest_site400 + 
                   (forest_site400 |sp) + (1|landscape:sp) + (1|site:sp) + 
                   (1|landscape) + (1|site),
                 family=binomial, data=high.gen,
                  nAGQ = 1, control = glmerControl(optimizer = "bobyqa",
                                  optCtrl = list(maxfun = 500000)))
mhigh.gen6a <- glmer(cbind(occor, n.visit-occor) ~  
                    forest_site600 + 
                     (forest_site600 |sp) + (1|landscape:sp) + (1|site:sp) + 
                     (1|landscape) + (1|site),
                   family=binomial, data=high.gen,
                   nAGQ = 1, control = glmerControl(optimizer = "bobyqa",
                                         optCtrl = list(maxfun = 500000)))
mhigh.gen8a <- glmer(cbind(occor, n.visit-occor) ~  
                    forest_site800 + 
                     (forest_site800|sp) + (1|landscape:sp) + (1|site:sp) + 
                     (1|landscape) + (1|site),
                   family=binomial, data=high.gen,
                   nAGQ = 1, control = glmerControl(optimizer = "bobyqa",
                                          optCtrl = list(maxfun = 500000)))

mlow.gen4a <- glmer(cbind(occor, n.visit-occor) ~  
                  forest_site400 + 
                   (forest_site400 |sp) + (1|landscape:sp) + (1|site:sp) + 
                   (1|landscape) + (1|site),
                 family=binomial, data=low.gen,
                  nAGQ = 1, control = glmerControl(optimizer = "bobyqa",
                                  optCtrl = list(maxfun = 500000)))
mlow.gen6a <- glmer(cbind(occor, n.visit-occor) ~  
                    forest_site600 + 
                     (forest_site600 |sp) + (1|landscape:sp) + (1|site:sp) + 
                     (1|landscape) + (1|site),
                   family=binomial, data=low.gen,
                   nAGQ = 1, control = glmerControl(optimizer = "bobyqa",
                                           optCtrl = list(maxfun = 500000)))
mlow.gen8a <- glmer(cbind(occor, n.visit-occor) ~  
                    forest_site800 +
                     (forest_site800|sp) + (1|landscape:sp) + (1|site:sp) + 
                     (1|landscape) + (1|site),
                   family=binomial, data=low.gen,
                   nAGQ = 1, control = glmerControl(optimizer = "bobyqa",
                              optCtrl = list(maxfun = 500000)))

save(mhigh.spe4a,mhigh.spe6a,mhigh.spe8a,
     mhigh.gen4a, mhigh.gen6a, mhigh.gen8a,
     mlow.spe4a, mlow.spe6a, mlow.spe8a,
     mlow.gen4a,mlow.gen6a,mlow.gen8a,
     file="models_outputs/models_local_cover.Rdata")
taic.ce <- AICtab(mhigh.spe4a, mhigh.spe6a, mhigh.spe8a, sort=F)
class(taic.ce) <- "data.frame"
taic.pe <- AICtab(mlow.spe4a, mlow.spe6a, mlow.spe8a, sort=F)
class(taic.pe) <- "data.frame"
taic.cg <- AICtab(mhigh.gen4a, mhigh.gen6a, mhigh.gen8a, sort=F)
class(taic.cg) <- "data.frame"
taic.pg <- AICtab(mlow.gen4a, mlow.gen6a, mlow.gen8a, sort=F)
class(taic.pg) <- "data.frame"
taic <- rbind(taic.pe, taic.ce, taic.pg, taic.cg)
taic$modelo <- rep(c("400m", "600m", "800m"), 4)
taic$dataset <- rep(c("low.spe", "high.spe", "low.gen", "high.gen"), each=3)

ttaic <- taic %>% dplyr::select(dataset,modelo,dAIC) %>% spread(dataset, dAIC) %>% select(modelo, high.spe, low.spe, high.gen, low.gen)
  
colnames(ttaic) <- c("model", "Low-quality", "High-quality", "Low-quality", "High-quality")
r2high.spe <- data.frame(r.quad(mhigh.spe4a, "forest_site400"),
                       m6a = r.quad(mhigh.spe6a, c("forest_site600"))[,2],
                       m8a = r.quad(mhigh.spe8a, "forest_site800")[,2]) %>%
  mutate_if(is.numeric, round, digits=3)
colnames(r2high.spe)[2] <- "m4a"
rownames(r2high.spe) <- r2high.spe$componente
r2low.spe <- data.frame(r.quad(mlow.spe4a, "forest_site400"),
                       m6a = r.quad(mlow.spe6a, c("forest_site600"))[,2],
                       m8a = r.quad(mlow.spe8a, "forest_site800")[,2]) %>%
  mutate_if(is.numeric, round, digits=3)
colnames(r2low.spe)[2] <- "m4a"
rownames(r2low.spe) <- r2low.spe$componente
r2high.gen <- data.frame(r.quad(mhigh.gen4a, "forest_site400"),
                       m6a = r.quad(mhigh.gen6a, c("forest_site600"))[,2],
                       m8a = r.quad(mhigh.gen8a, "forest_site800")[,2]) %>%
  mutate_if(is.numeric, round, digits=3)
colnames(r2high.gen)[2] <- "m4a"
rownames(r2high.gen) <- r2high.gen$componente
r2low.gen <- data.frame(r.quad(mlow.gen4a, "forest_site400"),
                       m6a = r.quad(mlow.gen6a, c("forest_site600"))[,2],
                       m8a = r.quad(mlow.gen8a, "forest_site800")[,2]) %>%
  mutate_if(is.numeric, round, digits=3)
colnames(r2low.gen)[2] <- "m4a"
rownames(r2low.gen) <- r2low.gen$componente

tudo <- rbind( t(r2low.spe[,-1]),t(r2high.spe[,-1]), t(r2low.gen[,-1]), t(r2high.gen[,-1]))
tudo <- tudo*100
tabis <- cbind(tudo,taic) %>% mutate(dAIC = round(dAIC,2))  
colnames(tabis) <- c("Total", "fixed","env.sp", "lands.sp", "site.sp", "lands", "site", "dAIC", "df", "Model","dataset") 

kable(tabis[,c(10,1:9)], "latex", booktabs=T, row.names = F,
      caption= "Overal and marginal r-squares and model comparisons with Akaike Information Criterion (AIC) for models with different local forest cover scales as predictor for the specialist and generalists species in both regions with different matrix qualities. For the terms see Table 1 (main text). dAIC is the difference in Akaike Information Criterion to the best model; df are the degrees of freedom.") %>%
  kable_styling() %>%
  group_rows("Forest specialist species", 1, 6) %>%
  group_rows("      Low-quality matrix", 1,3) %>%
  group_rows("      High-quality matrix", 4,6) %>%
  group_rows("Forest generalist species", 7, 12) %>%
  group_rows("      Low-quality matrix", 7,9) %>%
  group_rows("      High-quality matrix", 10,12) %>%
  add_header_above(c(" " = 1, " " = 7, "AIC"= 2))
pforest_site400 <- data.frame(forest_site400 = seq(min(high.spe$forest_site400), max(high.spe$forest_site400), length.out=20))
pforest_site400$pred <- predict(mhigh.spe4a, newdata=pforest_site400, re.form=NA, type="response")
pforest_site400$cob <- pforest_site400$forest_site400*sd(high.spe$forest_site400Orig) + mean(high.spe$forest_site400Orig)

pforest_site600 <- data.frame(forest_site600 = seq(min(high.spe$forest_site600), max(high.spe$forest_site600), length.out=20))
pforest_site600$pred <- predict(mhigh.spe6a, newdata=pforest_site600, re.form=NA, type="response")
pforest_site600$cob <- pforest_site600$forest_site600*sd(high.spe$forest_site600Orig) + mean(high.spe$forest_site600Orig)

pforest_site800 <- data.frame(forest_site800 = seq(min(high.spe$forest_site800), max(high.spe$forest_site800), length.out=20))
pforest_site800$pred <- predict(mhigh.spe8a, newdata=pforest_site800, re.form=NA, type="response")
pforest_site800$cob<- pforest_site800$forest_site800*sd(high.spe$forest_site800Orig) + mean(high.spe$forest_site800Orig)


fhigh.spe <- rbind(pforest_site400[,2:3], pforest_site600[,2:3], pforest_site800[,2:3]) %>%
  mutate(modelo = rep(c("forest_site400","forest_site600","forest_site800"), each=20),
         dataset = "high.spe")
#ggplot(fig, aes(x=cob, y=pred, col=modelo)) + geom_line()
pforest_site400 <- data.frame(forest_site400 = seq(min(low.spe$forest_site400), max(low.spe$forest_site400), length.out=20))
pforest_site400$pred <- predict(mlow.spe4a, newdata=pforest_site400, re.form=NA, type="response")
pforest_site400$cob <- pforest_site400$forest_site400*sd(low.spe$forest_site400Orig) + mean(low.spe$forest_site400Orig)

pforest_site600 <- data.frame(forest_site600 = seq(min(low.spe$forest_site600), max(low.spe$forest_site600), length.out=20))
pforest_site600$pred <- predict(mlow.spe6a, newdata=pforest_site600, re.form=NA, type="response")
pforest_site600$cob <- pforest_site600$forest_site600*sd(low.spe$forest_site600Orig) + mean(low.spe$forest_site600Orig)

pforest_site800 <- data.frame(forest_site800 = seq(min(low.spe$forest_site800), max(low.spe$forest_site800), length.out=20))
pforest_site800$pred <- predict(mlow.spe8a, newdata=pforest_site800, re.form=NA, type="response")
pforest_site800$cob<- pforest_site800$forest_site800*sd(low.spe$forest_site800Orig) + mean(low.spe$forest_site800Orig)

flow.spe <- rbind(pforest_site400[,2:3], pforest_site600[,2:3], pforest_site800[,2:3]) %>%
  mutate(modelo = rep(c("forest_site400","forest_site600", "forest_site800"), each=20),
         dataset = "low.spe")
pforest_site400 <- data.frame(forest_site400 = seq(min(high.gen$forest_site400), max(high.gen$forest_site400), length.out=20))
pforest_site400$pred <- predict(mhigh.gen4a, newdata=pforest_site400, re.form=NA, type="response")
pforest_site400$cob <- pforest_site400$forest_site400*sd(high.gen$forest_site400Orig) + mean(high.gen$forest_site400Orig)
 
pforest_site600 <- data.frame(forest_site600 = seq(min(high.gen$forest_site600), max(high.gen$forest_site600), length.out=20))
pforest_site600$pred <- predict(mhigh.gen6a, newdata=pforest_site600, re.form=NA, type="response")
pforest_site600$cob <- pforest_site600$forest_site600*sd(high.gen$forest_site600Orig) + mean(high.gen$forest_site600Orig)

pforest_site800 <- data.frame(forest_site800 = seq(min(high.gen$forest_site800), max(high.gen$forest_site800), length.out=20))
pforest_site800$pred <- predict(mhigh.gen8a, newdata=pforest_site800, re.form=NA, type="response")
pforest_site800$cob<- pforest_site800$forest_site800*sd(high.gen$forest_site800Orig) + mean(high.gen$forest_site800Orig)

fhigh.gen <- rbind(pforest_site400[,2:3], pforest_site600[,2:3], pforest_site800[,2:3]) %>%
  mutate(modelo = rep(c("forest_site400","forest_site600","forest_site800"), each=20),
         dataset = "high.gen")
pforest_site400 <- data.frame(forest_site400 = seq(min(low.gen$forest_site400), max(low.gen$forest_site400), length.out=20))
pforest_site400$pred <- predict(mlow.gen4a, newdata=pforest_site400, re.form=NA, type="response")
pforest_site400$cob <- pforest_site400$forest_site400*sd(low.gen$forest_site400Orig) + mean(low.gen$forest_site400Orig)

pforest_site600 <- data.frame(forest_site600 = seq(min(low.gen$forest_site600), max(low.gen$forest_site600), length.out=20))
pforest_site600$pred <- predict(mlow.gen6a, newdata=pforest_site600, re.form=NA, type="response")
pforest_site600$cob <- pforest_site600$forest_site600*sd(low.gen$forest_site600Orig) + mean(low.gen$forest_site600Orig)

pforest_site800 <- data.frame(forest_site800 = seq(min(low.gen$forest_site800), max(low.gen$forest_site800), length.out=20))
pforest_site800$pred <- predict(mlow.gen8a, newdata=pforest_site800, re.form=NA, type="response")
pforest_site800$cob<- pforest_site800$forest_site800*sd(low.gen$forest_site800Orig) + mean(low.gen$forest_site800Orig)

flow.gen <- rbind(pforest_site400[,2:3], pforest_site600[,2:3], pforest_site800[,2:3]) %>%
  mutate(modelo = rep(c("forest_site400", "forest_site600", "forest_site800"),each=20),
         dataset="low.gen")

In Figure S2., we present the occurrence probability predicted for the models with different local forest cover scales for all the assemblages. Predictions were quite similar and decreased with forest cover for the specialists, especially in the low-quality matrix region, and increased or remained flat for the generalists.

labs <- c(high = "High-quality matrix", low="Low-quality matrix",
          spe = "Specialists", gen = "Generalists")
rbind(fhigh.spe, flow.spe,fhigh.gen, flow.gen) %>%
  separate(dataset, c("matrix", "habitat")) %>%
  ggplot(aes(x=cob, y=pred, col=modelo)) + geom_line()+
  facet_grid(habitat~matrix, labeller=as_labeller(labs), scales = "free") +
  scale_color_discrete("Scale", labels=c("400 m", "600 m", "800 m")) +
  scale_x_continuous(limits = c(0,80), name="Local forest cover (%)")+
  scale_y_continuous("Occurrence probability",
                     breaks=c(0,0.02,0.04,0.06,0.08,0.1),
                     expand=expand_scale(add=0), limits=c(0,0.1)) +
  theme(panel.spacing = unit(5, "mm")) +
  geom_hline(yintercept = 0, size=0.8)
Predictions of the models with different local forest cover scales (lines) for specialists and generalists in both regions.

Predictions of the models with different local forest cover scales (lines) for specialists and generalists in both regions.

Residual correlations among species

We evaluated the residuals by Kendall correlations among species and among sites for the 400 m models using the predictions for site:sp random effects (Observation Level Random Effect), following the code provided by Miller, Damschen & Ives (2018). Codes for the species names are presented in the dataset available.

Below we show models residual correlation plots for the specialists in the high-quality matrix landscape. All the other assemblages presented similar results.

nsites = 40
nspp = length(unique(high.spe$sp))
dat <- high.spe

dat$resid <- as.matrix(ranef(mhigh.spe4a)$`site:sp`)
X <- matrix(dat$resid, nrow=nsites, ncol=nspp, byrow=F)
rownames(X) <- unique(dat$site)
colnames(X) <- unique(dat$sp)

# species correlations
corrs.sp <- cor(X, method="kendall")
for(i in 1:nspp) corrs.sp[i,i] <- NA

# site correlations
corrs.site <- cor(t(X), method="kendall")
for(i in 1:nsites) corrs.site[i,i] <- NA

Range of species correlations: -0.41, 0.46.

Range of sites correlations: -0.25, 0.31.

corrplot(corrs.sp, method="square", type="upper",diag=F,tl.cex=0.5,cl.cex=0.5)
Species residual Kendall correlations for the specialist species in the coffee matrix region.

Species residual Kendall correlations for the specialist species in the coffee matrix region.

corrplot(corrs.site, method="square", type="upper",diag=F,tl.cex=0.5,cl.cex=0.5)
Sites residual Kendall correlations for the specialist species in the coffee matrix region.

Sites residual Kendall correlations for the specialist species in the coffee matrix region.

x.sp <- matrix(corrs.sp, ncol=1)
x.site <- matrix(corrs.site, ncol=1)
x.sp <- x.sp[!is.na(x.sp)]
x.site <- x.site[!is.na(x.site)]

# Figure histogram
par(mfrow=c(1,2), mai=c(1,1,.5,.2))
hist(corrs.sp, main="", probability=T, breaks=.1*(-10:10), xlab="Correlation coefficients", ylab="Probability", ylim=c(0,4), cex.lab=1.2)
mtext("Species", side=3,cex=1.2)

hist(corrs.site, main="", probability=T, breaks=.1*(-10:10), xlab="Correlation coefficients", ylab="Probability", ylim=c(0,4), cex.lab=1.2)
mtext("Sites", side=3, cex=1.2)
Histograms of the residual Kendall correlations for the specialists species in the coffee matrix region.

Histograms of the residual Kendall correlations for the specialists species in the coffee matrix region.

4. Including landscape forest cover

After selecting the local forest cover of 400 m radius buffer around each site for all datasets, we included the landscape forest cover (2 km radius buffer around the centroid of the landscape) in the model.

The R syntax example of this model area as follows:

model <- glmer(cbind(occor, n.visit-occor) ~  
                   local.400 + landscape.2k +
                   (local.400 + landscape.2k |sp) + 
                   (1|landscape:sp) + (1|site:sp) + 
                   (1|landscape) + (1|site),
                 family=binomial, data=high.spe)
mhigh.spe42 <- glmer(cbind(occor, n.visit-occor) ~  
                  forest_site400 + forest_land +
                   (forest_site400 + forest_land |sp) + (1|landscape:sp) + (1|site:sp) + 
                   (1|landscape) + (1|site),
                 family=binomial, data=high.spe,
                  nAGQ = 1, control = glmerControl(optimizer = "bobyqa",
                                  optCtrl = list(maxfun = 500000)))
mlow.spe42 <- glmer(cbind(occor, n.visit-occor) ~  
                  forest_site400 + forest_land +
                   (forest_site400 + forest_land |sp) + (1|landscape:sp) + (1|site:sp) + 
                   (1|landscape) + (1|site),
                 family=binomial, data=low.spe,
                  nAGQ = 1, control = glmerControl(optimizer = "bobyqa",
                                  optCtrl = list(maxfun = 500000)))
mhigh.gen42 <- glmer(cbind(occor, n.visit-occor) ~  
                  forest_site400 + forest_land +
                   (forest_site400 + forest_land |sp) + (1|landscape:sp) + (1|site:sp) + 
                   (1|landscape) + (1|site),
                 family=binomial, data=high.gen,
                  nAGQ = 1, control = glmerControl(optimizer = "bobyqa",
                                  optCtrl = list(maxfun = 500000)))
mlow.gen42 <- glmer(cbind(occor, n.visit-occor) ~  
                  forest_site400 + forest_land +
                   (forest_site400 + forest_land |sp) + (1|landscape:sp) + (1|site:sp) + 
                   (1|landscape) + (1|site),
                 family=binomial, data=low.gen,
                  nAGQ = 1, control = glmerControl(optimizer = "bobyqa",
                                  optCtrl = list(maxfun = 500000)))

save(mhigh.spe42, mlow.spe42, mhigh.gen42, mlow.gen42,
     file="models_outputs/models_landscape_cover.Rdata")

Before analysing results, we evaluated possible colinearity between local and landscape forest cover using the Variance Inflation Factor with the code provided by John Lefcheck (https://jonlefcheck.net/2012/12/28/dealing-with-multicollinearity-using-variance-inflation-factors/). With VIF we found no evidence of collinearity between the forest cover scales (Table S2.).

varif <- rbind(vif.mer(mhigh.spe42),vif.mer(mlow.spe42),vif.mer(mhigh.gen42),
      vif.mer(mlow.gen42)) 
rownames(varif) <- c("Coffee", "Pasture", "Coffe", "Pasture")
colnames(varif) <- c("Local", "Landscape")

kable(varif, "latex", booktabs=T, caption= "Variance Inflation Factor index for the variables of local forest cover and landscape forest cover.", digits=2) %>%
  group_rows("Specialists", 1, 2) %>%
  group_rows("Generalists", 3,4) 

Predictions of the models are present in Figure S2.. It is important to notice the differences in 20 and 40% landscape forest cover predictions for the specialists in the low-quality matrix.

pforest_site400 <- data.frame(forest_site400 = seq(min(high.spe$forest_site400),
                                   max(high.spe$forest_site400), length.out=20))
pforest_site400$pred <- predict(mhigh.spe4a, newdata=pforest_site400, re.form=NA, type="response")
pforest_site400$cob <- pforest_site400$forest_site400*sd(high.spe$forest_site400Orig) + mean(high.spe$forest_site400Orig)

porc <- c(20, 40)

p42 <- expand.grid(forest_site400 = seq(min(high.spe$forest_site400), max(high.spe$forest_site400), length.out=10),
                  forest_land = (porc - mean(high.spe$forest_landOrig))/sd(high.spe$forest_landOrig))
p42$pred <- predict(mhigh.spe42, newdata=p42, re.form=NA, type="response")
p42$cob <- p42$forest_site400*sd(high.spe$forest_site400Orig) + mean(high.spe$forest_site400Orig)
p42$cob.paisa <- p42$forest_land*sd(high.spe$forest_landOrig) + mean(high.spe$forest_landOrig)

loc <- pforest_site400[,2:3] %>%
  mutate(cob.paisa=NA, modelo = "4a")
pais <- p42[,3:5] %>%
  mutate(modelo ="42")

fhigh.spe <- rbind(loc, pais) %>% mutate(dataset="high.spe")
pforest_site400 <- data.frame(forest_site400 = seq(min(low.spe$forest_site400), max(low.spe$forest_site400), length.out=20))
pforest_site400$pred <- predict(mlow.spe4a, newdata=pforest_site400, re.form=NA, type="response")
pforest_site400$cob <- pforest_site400$forest_site400*sd(low.spe$forest_site400Orig) + mean(low.spe$forest_site400Orig)
porc <- c(20, 40)

p42 <- expand.grid(forest_site400 = seq(min(low.spe$forest_site400), max(low.spe$forest_site400), length.out=10),
                  forest_land = (porc - mean(low.spe$forest_landOrig))/sd(low.spe$forest_landOrig))
p42$pred <- predict(mlow.spe42, newdata=p42, re.form=NA, type="response")
p42$cob <- p42$forest_site400*sd(low.spe$forest_site400Orig) + mean(low.spe$forest_site400Orig)
p42$cob.paisa <- p42$forest_land*sd(low.spe$forest_landOrig) + mean(low.spe$forest_landOrig)

loc <- pforest_site400[,2:3] %>%
  mutate(cob.paisa=NA, modelo = "4a")
pais <- p42[,3:5] %>%
  mutate(modelo ="42")

flow.spe <- rbind(loc, pais) %>% mutate(dataset="low.spe")
pforest_site400 <- data.frame(forest_site400 = seq(min(high.gen$forest_site400), max(high.gen$forest_site400), length.out=20))
pforest_site400$pred <- predict(mhigh.gen4a, newdata=pforest_site400, re.form=NA, type="response")
pforest_site400$cob <- pforest_site400$forest_site400*sd(high.gen$forest_site400Orig) + mean(high.gen$forest_site400Orig)
porc <- c(20, 40)

porc <- c(20, 40)
p42 <- expand.grid(forest_site400 = seq(min(high.gen$forest_site400), max(high.gen$forest_site400), length.out=10),
                  forest_land = (porc - mean(high.gen$forest_landOrig))/sd(high.gen$forest_landOrig))
p42$pred <- predict(mhigh.gen42, newdata=p42, re.form=NA, type="response")
p42$cob <- p42$forest_site400*sd(high.gen$forest_site400Orig) + mean(high.gen$forest_site400Orig)
p42$cob.paisa <- p42$forest_land*sd(high.gen$forest_landOrig) + mean(high.gen$forest_landOrig)

loc <- pforest_site400[,2:3] %>%
  mutate(cob.paisa=NA, modelo = "4a")
pais <- p42[,3:5] %>%
  mutate(modelo ="42")

fhigh.gen <- rbind(loc, pais) %>% mutate(dataset="high.gen")
pforest_site400 <- data.frame(forest_site400 = seq(min(low.gen$forest_site400), max(low.gen$forest_site400), length.out=20))
pforest_site400$pred <- predict(mlow.gen4a, newdata=pforest_site400, re.form=NA, type="response")
pforest_site400$cob <- pforest_site400$forest_site400*sd(low.gen$forest_site400Orig) + mean(low.gen$forest_site400Orig)
porc <- c(20, 40)

p42 <- expand.grid(forest_site400 = seq(min(low.gen$forest_site400), max(low.gen$forest_site400), length.out=10),
                  forest_land = (porc - mean(low.gen$forest_landOrig))/sd(low.gen$forest_landOrig))
p42$pred <- predict(mlow.gen42, newdata=p42, re.form=NA, type="response")
p42$cob <- p42$forest_site400*sd(low.gen$forest_site400Orig) + mean(low.gen$forest_site400Orig)
p42$cob.paisa <- p42$forest_land*sd(low.gen$forest_landOrig) + mean(low.gen$forest_landOrig)

loc <- pforest_site400[,2:3] %>%
  mutate(cob.paisa=NA, modelo = "4a")
pais <- p42[,3:5] %>%
  mutate(modelo ="42")

flow.gen <- rbind(loc, pais) %>% mutate(dataset="low.gen")
labs <- c(high = "High-quality matrix", low = "Low-quality matrix",
          spe = "Specialists", gen = "Generalists")
rbind(fhigh.spe, flow.spe, fhigh.gen, flow.gen) %>%
  separate(dataset, c("matrix", "habitat")) %>%
  ggplot(aes(x=cob, y=pred, col=as.factor(cob.paisa))) +
  geom_line() +
  facet_grid(habitat~matrix, labeller=as_labeller(labs)) +
  scale_color_discrete("Landscape cover", labels=c("20%", "40%", "only local model")) +
  scale_x_continuous(limits = c(0,80), name="Local forest cover 400m (%)") +
  scale_y_continuous("Occurrence probability",
                     expand=expand_scale(add=0), limits=c(0,0.16)) +
  theme(panel.spacing = unit(5, "mm")) +
  geom_hline(yintercept = 0, size=0.8)
Predictions of the models without (gray lines) and with landscape forest cover scales (20 percent cover in red and 40 percent cover in blue lines) for specialists and generalists in both regions.

Predictions of the models without (gray lines) and with landscape forest cover scales (20 percent cover in red and 40 percent cover in blue lines) for specialists and generalists in both regions.

References

Bates, D., Mächler, M., Bolker, B. & Walker, S. (2015). Fitting Linear Mixed-Effects Models Using lme4. Journal of Statistical Software 67, 1–48.
Miller, J.E.D., Damschen, E.I. & Ives, A.R. (2018). Functional traits and community composition: A comparison among community-weighted means, weighted correlations, and multilevel models. Methods in Ecology and Evolution 0.